## "Where are the missing dead" replication
##
## Reproduce: Randomization test
#
##            - Figure 10
##            
## 
## Input:     main.dta


## Guoer Liu
## 7/31/2022 


setwd("~/Dropbox/Missing dead/replication/")


library(haven)
library(foreign)
library(estimatr)
library(scales)
library(extrafont) ## change fonts in plot

### Function
densPlot <- function(vector, matrix.x.lab, matrix.title){
    # Args:
    #   vector: vector to be plotted
    #   matrix.x.lab:   a string of figure x axis label
    #   matrix.title:   a string of figure title 
    dens.v <- density(vector)
    plot(dens.v, #xlim = c(0.5, x.len), 
             #ylim = c(y.min, y.max), 
             #type = "n", 
             xlab = matrix.x.lab,
             main = matrix.title, 
             ylab = "Density", font = 2,
             cex.main = 1.6, cex.lab = 1.3,
             bty = "n",
             lwd = 2)
    abline(v = 0, col = "gray", lty = 2, lwd = 2)
}



mydata <- read_dta("main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)
mydata <- mydata[mydata$treat_group == 0, ]


outcome <- c("deathsum_py_lg", "totcount_py_lg",
             "traf_deathsum_py_lg", "traf_totcount_py_lg",
             "othr_deathsum_py_lg", "othr_totcount_py_lg")

cov.names <- c("log_pop2", "rural_pop_pct",
               "log_light", "log_total_tax_rev_pc",
               "log_coal_produ0", "log_mine_indwage",
               "log_traf_indwage",
               "sec_tty", "sec_age", 
               "sec_promotion", "gov_tty", 
               "gov_age", "gov_promotion")

t.var <- c("random_did", "treat_time", "random_treat_group")


rhs <- paste('~', paste(c(t.var, cov.names), collapse = " + "), 
                          "+ factor(year) + factor(prov_id)")


iteration <- 5000
random.size <- 4
random.coef <- list()


set.seed(073122)
for(j in outcome){
    estimate <- NULL
    for(i in 1:iteration){
        prov_sample <- sample(unique(mydata$prov_id), 
                              random.size, replace = F) # sample group
        mydata$random_treat_group <- ifelse(mydata$prov_id %in% (prov_sample), 
                                            1, 0)
        mydata$random_did <- mydata$random_treat_group * mydata$treat_time
        formula <- as.formula(paste(j, rhs))
        mod <- lm_robust(formula, cluster = prov_id, data = mydata)
        estimate <- c(estimate, coefficients(mod)['random_did'])
        if(i %% 1000 == 0){
            cat("at outcome", j, ", the", i, "th iteration\n")
        } 
    }
    random.coef[[j]] <- estimate
}




#pdf("f10_randomTest.pdf", family = "Times New Roman",
#     width = 10, height = 6.18)
par(mfcol = c(2, 3), mgp=c(2.5, 0.6, 0), mar = c(4, 5, 4, 2))
densPlot(random.coef[[1]], "DD Coefficient", "Aggregate (Deaths)")
densPlot(random.coef[[2]], "DD Coefficient", "Aggregate (Acc. Counts)")
densPlot(random.coef[[3]], "DD Coefficient", "Traffic (Deaths)")
densPlot(random.coef[[4]], "DD Coefficient", "Traffic (Acc. Counts)")
densPlot(random.coef[[5]], "DD Coefficient", "Others (Deaths)")
densPlot(random.coef[[6]], "DD Coefficient", "Others (Acc. Counts)")
#dev.off()


















